importing packages:
library(readr)
library(dplyr)
library(plotly)
library(zoo)
library(ggplot2)
library(gganimate)
library(ggthemes)
library(ggmap)
library(animation)
library(highcharter)
library(stringr)
library(PerformanceAnalytics)
library(maps)
library(ggpubr)
library(tidyr)
library(tidytext)
library(wordcloud2)
library(ngram)
library(qdapRegex)#historical_data = read_rds("../data/historical_web_data_26112015.rds")
iran_earthquake = read_rds("../data/iran_earthquake.rds")
#disasters = read_delim("../data/signif.txt", delim = "\t")
worldwide = read_csv("../data/worldwide.csv")
Southern_Cal = read_csv("../data/earth007.csv")
# italy = scan(file = "C:/Users/HP/Desktop/listfiles.txt", what = character(), sep = "\n")Probability of having an earthquake with an intensity above 6 Richter:
Iran Dataset:
iran_earthquake %>%
filter(!is.na(Mag))->tmp
fill <- "#4271AE"
line <- "#1F3552"
ggplot(tmp, aes(x = Mag)) +
geom_density(size=1) + xlab("Magnitude") + ylab("Density") + theme_bw()+ theme(axis.text=element_text(size=14),
axis.title=element_text(size=14))qqnorm(tmp$Mag)a = sample(tmp$Mag, 5000)
shapiro.test(a)##
## Shapiro-Wilk normality test
##
## data: a
## W = 0.96133, p-value < 2.2e-16
iran_earthquake %>%
filter(format(as.Date(OriginTime, format="%Y-%m-%d %H:%M:%S"),"%Y")>=2010) %>% filter(Mag>=2) -> iranduration = difftime(max(iran$OriginTime, na.rm = T),
min(iran$OriginTime, na.rm = T),
units = "week")
duration = as.numeric(duration)/52.25
duration## [1] 5.892088
iran %>%
group_by(Mag) %>%
summarise(n=n()) %>%
na.omit() %>%
arrange(-Mag) %>%
mutate(N=cumsum(n)/duration, logN=log10(N)) %>%
filter(Mag<=12)-> tmp
ggplot(tmp, aes(x=Mag, y=N))+
geom_point() +
xlab("Magnitude") +
ylab("Number of Occurance per Year")+
theme_bw()+ theme(axis.text=element_text(size=14),
axis.title=element_text(size=14))iran %>%
group_by(Mag) %>%
summarise(n=n()) %>%
na.omit() %>%
arrange(-Mag) %>%
mutate(N=cumsum(n)/duration, logN=log10(N)) %>%
filter(Mag<=12)-> tmp
ggplot(tmp, aes(x=Mag, y=N))+
geom_smooth(method = "lm")+
geom_point() +
scale_y_log10()+
xlab("Magnitude") +
ylab("Number of Occurance per Year")+
theme_bw()+ theme(axis.text=element_text(size=14),
axis.title=element_text(size=14))model = lm(logN~Mag, data=tmp)alpha = nrow(iran)/duration
beta = -as.numeric(model$coefficients[2])*log(10)R_Iran = function(M,D)
{
x=exp(-beta*M)
y=exp(-alpha*D*x)
return(1-y)
}
Time_Iran = function(M)
{
x=exp(beta*M)
y=1/alpha
return(x*y)
}Time_Iran(6)## [1] 19.04742
Time_Iran(7)## [1] 132.5398
R_Iran(7,5)## [1] 0.03702182
R_Iran(7,10)## [1] 0.07267303
Southern California Dataset:
Southern_Cal %>%
filter(!is.na(mag))->tmp
fill <- "#4271AE"
line <- "#1F3552"
ggplot(tmp, aes(x = mag)) +
geom_density(size=1) + xlab("Magnitude") + ylab("Density") + theme_bw()+ theme(axis.text=element_text(size=14),
axis.title=element_text(size=14))duration = difftime(max(as.Date(Southern_Cal$`YYY/MM/DD`,
format = "%m/%d/%Y"), na.rm = T), min(as.Date(Southern_Cal$`YYY/MM/DD`,
format = "%m/%d/%Y"), na.rm = T), units = "week")
duration = as.numeric(duration)/52.25
duration## [1] 6.985646
Southern_Cal %>%
group_by(mag) %>%
summarise(n=n()) %>%
na.omit() %>%
arrange(-mag) %>%
mutate(N=cumsum(n)/duration, logN=log10(N)) %>%
filter(mag<=12)-> tmp
ggplot(tmp, aes(x=mag, y=N))+
geom_point() +
xlab("Magnitude") +
ylab("Number of Occurance per Year")+
theme_bw()+ theme(axis.text=element_text(size=14),
axis.title=element_text(size=14))Southern_Cal %>%
group_by(mag) %>%
summarise(n=n()) %>%
na.omit() %>%
arrange(-mag) %>%
mutate(N=cumsum(n)/duration, logN=log10(N)) %>%
filter(mag<=12)-> tmp
ggplot(tmp, aes(x=mag, y=N))+
geom_smooth(method = "lm")+
geom_point() +
scale_y_log10()+
xlab("Magnitude") +
ylab("Number of Occurance per Year")+
theme_bw()+ theme(axis.text=element_text(size=14),
axis.title=element_text(size=14))model = lm(logN~mag, data=tmp)alpha = nrow(Southern_Cal)/duration
beta = -as.numeric(model$coefficients[2])*log(10)R_Cal = function(M,D)
{
x=exp(-beta*M)
y=exp(-alpha*D*x)
return(1-y)
}
Time_Cal = function(M)
{
x=exp(beta*M)
y=1/alpha
return(x*y)
}Time_Cal(6)## [1] 5.587094
Time_Cal(7)## [1] 39.77481
R_Cal(7,5)## [1] 0.1181274
R_Cal(7,10)## [1] 0.2223008
How can we predict an earthquake by analyzing its foreshocks?
worldwide %>%
select(time, mag, place) %>%
na.omit() %>%
group_by(place) %>%
mutate(nextEarthquakeIn = lead(time)-time) %>%
ungroup() -> tmp
Now let’s take a look at earthquake distribution in time:
ggplot(tmp, aes(x = nextEarthquakeIn)) +
geom_density(size=1) + xlab("Time of Next Earthquake (Second)") + ylab("Density") + theme_bw()+ theme(axis.text=element_text(size=14),
axis.title=element_text(size=14))
Blah blah blah…
tmp %>%
select(nextEarthquakeIn) %>%
na.omit() %>%
arrange(nextEarthquakeIn) %>%
mutate(binNo = as.integer(nextEarthquakeIn/60)*60+30)%>%
group_by(binNo) %>%
summarise(n=n()) %>%
group_by(n) %>%
summarise(Next = mean(binNo)) %>%
mutate(Prob = n/sum(n)) -> nextEarthquakeIn
ggplot(nextEarthquakeIn, aes(x=Next, y=Prob, ))+
scale_x_log10() +
scale_y_log10() +
geom_smooth(method = "lm") +
geom_point() +
xlab("Time of Next Earthquake (Second)") +
ylab("Density")+
theme_bw()+ theme(axis.text=element_text(size=14),
axis.title=element_text(size=14))
Blah blah blah…
tmp %>%
select(nextEarthquakeIn) %>%
na.omit() %>%
arrange(nextEarthquakeIn) %>%
mutate(Next = as.integer(nextEarthquakeIn/60)*60+30)%>%
group_by(Next) %>%
summarise(n=n()) %>%
mutate(CumProb=cumsum(n)/sum(n))-> tmp2
We plot accumulative distribution function:
ggplot(tmp2, aes(x=Next, y=CumProb))+
geom_line(size=1) +
xlab("Time of Next Earthquake (Second)") +
ylab("Cumulative Probability")+
theme_bw()+ theme(axis.text=element_text(size=14),
axis.title=element_text(size=14))
Blah blah:
probForecast = function(min, max)
{
tmp2 %>%
filter(Next<max & Next>=min) -> x
a = max(x$CumProb)-min(x$CumProb)
return(a)
}
We plot accumulative distribution function:
probForecast (0, 12*60*60)## [1] 0.2332374
probForecast (12*60*60, 2*12*60*60)## [1] 0.06297362
probForecast (2*12*60*60, 3*12*60*60)## [1] 0.04182254
probForecast (3*12*60*60, 4*12*60*60)## [1] 0.03261391
probForecast (12*60*60, 2*12*60*60)## [1] 0.06297362
Is there any correlation between the intensity of an earthquake and its depth?
worldwide %>%
select(mag, depth) %>%
na.omit() -> data
ggplot(data, aes(x=depth, y=mag))+
geom_point(colour = fill,
alpha = 0.3) +
theme_bw()+ theme(axis.text=element_text(size=14),
axis.title=element_text(size=14)) +
xlab("Depth") +
ylab("Magnitude")data %>%
mutate(Depth=as.integer(depth/10)) %>%
group_by(Depth) %>%
summarise(Mag = mean(mag)) %>%
mutate(Depth = 10*Depth)-> data2
ggplot(data2, aes(x=Depth, y=Mag))+
geom_point() +
geom_smooth(size=1) +
theme_bw()+ theme(axis.text=element_text(size=14),
axis.title=element_text(size=14)) +
xlab("Depth") +
ylab("Magnitude")